Datasource : https://www.kaggle.com/datasets/rajyellow46/wine-quality
# Load in some packages
import calendar
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
wine_df = pd.read_csv(r"C:\Users\jki\Downloads\wine.csv")
wine_df.head(5)
fixed acidity | volatile acidity | citric acid | residual sugar | chlorides | free sulfur dioxide | total sulfur dioxide | density | pH | sulphates | alcohol | quality | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 7.4 | 0.70 | 0.00 | 1.9 | 0.076 | 11.0 | 34.0 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 |
1 | 7.8 | 0.88 | 0.00 | 2.6 | 0.098 | 25.0 | 67.0 | 0.9968 | 3.20 | 0.68 | 9.8 | 5 |
2 | 7.8 | 0.76 | 0.04 | 2.3 | 0.092 | 15.0 | 54.0 | 0.9970 | 3.26 | 0.65 | 9.8 | 5 |
3 | 11.2 | 0.28 | 0.56 | 1.9 | 0.075 | 17.0 | 60.0 | 0.9980 | 3.16 | 0.58 | 9.8 | 6 |
4 | 7.4 | 0.70 | 0.00 | 1.9 | 0.076 | 11.0 | 34.0 | 0.9978 | 3.51 | 0.56 | 9.4 | 5 |
# lets check for missing
missing_values = wine_df.isna().sum()
print(missing_values)
fixed acidity 0 volatile acidity 0 citric acid 0 residual sugar 0 chlorides 0 free sulfur dioxide 0 total sulfur dioxide 0 density 0 pH 0 sulphates 0 alcohol 0 quality 0 dtype: int64
# lets have a look at the data types
wine_df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 1599 entries, 0 to 1598 Data columns (total 12 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 fixed acidity 1599 non-null float64 1 volatile acidity 1599 non-null float64 2 citric acid 1599 non-null float64 3 residual sugar 1599 non-null float64 4 chlorides 1599 non-null float64 5 free sulfur dioxide 1599 non-null float64 6 total sulfur dioxide 1599 non-null float64 7 density 1599 non-null float64 8 pH 1599 non-null float64 9 sulphates 1599 non-null float64 10 alcohol 1599 non-null float64 11 quality 1599 non-null int64 dtypes: float64(11), int64(1) memory usage: 150.0 KB
wine_df["fixed acidity"].describe()
count 1599.000000 mean 8.319637 std 1.741096 min 4.600000 25% 7.100000 50% 7.900000 75% 9.200000 max 15.900000 Name: fixed acidity, dtype: float64
The Average Fixed Quality id 8.31
import plotly.express as px
# Assuming wine_df contains data with columns 'fixed acidity' and 'quality'
# Plot relationship between fixed acidity and wine quality
fig = px.bar(wine_df, x='fixed acidity', y='quality', color='quality', height=320, labels={'quality':'Wine Quality', 'fixed acidity':'Fixed Acidity'}, title='Relationship between Fixed Acidity and Wine Quality')
fig.update_layout(xaxis_tickangle=-45) # Rotate x-axis labels for better readability
# Show the plot
fig.show()
Fixed Acidity levels at 7 produces the best quality of Wine
wine_df["volatile acidity"].describe()
count 1599.000000 mean 0.527821 std 0.179060 min 0.120000 25% 0.390000 50% 0.520000 75% 0.640000 max 1.580000 Name: volatile acidity, dtype: float64
The average volatile acidity is 0.52
import plotly.express as px
# Assuming wine_df contains data with columns 'volatile acidity' and 'quality'
# Plot relationship between volatile acidity and wine quality
fig = px.bar(wine_df, x='volatile acidity', y='quality', height=320, labels={'quality':'Wine Quality', 'volatile acidity':'Volatile Acidity'}, title='Relationship between Volatile Acidity and Wine Quality')
fig.update_layout(xaxis_tickangle=-45) # Rotate x-axis labels for better readability
# Show the plot
fig.show()
Volatile acidity levels of 0.5 and 0.6 produces the best wine quality
wine_df["citric acid"].describe()
count 1599.000000 mean 0.270976 std 0.194801 min 0.000000 25% 0.090000 50% 0.260000 75% 0.420000 max 1.000000 Name: citric acid, dtype: float64
import plotly.express as px
# Assuming wine_df contains data with columns 'citric acid' and 'quality'
# Plot relationship between fixed acidity and wine quality
fig = px.bar(wine_df, x='citric acid', y='quality', height=320, labels={'quality':'Wine Quality', 'citric acid':'citric acid'}, title='Relationship between vcitric acid and Wine Quality')
fig.update_layout(xaxis_tickangle=-45) # Rotate x-axis labels for better readability
# Show the plot
fig.show()
o citirc acidic level produces the beset quality of wine
wine_df["residual sugar"].describe()
count 1599.000000 mean 2.538806 std 1.409928 min 0.900000 25% 1.900000 50% 2.200000 75% 2.600000 max 15.500000 Name: residual sugar, dtype: float64
The average residual sugar is mean 2.538806
import plotly.express as px
# Assuming wine_df contains data with columns 'citric acid' and 'quality'
# Plot relationship between fixed acidity and wine quality
fig = px.bar(wine_df, x='residual sugar', y='quality', height=320, labels={'residual sugar':'Wine Quality', 'residual sugar':'residual sugar'}, title='Relationship between residual sugarand Wine Quality')
fig.update_layout(xaxis_tickangle=-45) # Rotate x-axis labels for better readability
# Show the plot
fig.show()
Redidula Levels of 2 contributes to the best quality of wine quality
wine_df["chlorides"].describe()
count 1599.000000 mean 0.087467 std 0.047065 min 0.012000 25% 0.070000 50% 0.079000 75% 0.090000 max 0.611000 Name: chlorides, dtype: float64
The average chloride levels id 0.087467
import plotly.express as px
# Assuming wine_df contains data with columns 'citric acid' and 'quality'
# Plot relationship between fixed acidity and wine quality
fig = px.bar(wine_df, x='chlorides', y='quality', height=320, labels={'chlorides"':'Wine Quality', 'chlorides"':'chlorides"'}, title='Relationship between chlorides" and Wine Quality')
fig.update_layout(xaxis_tickangle=-45) # Rotate x-axis labels for better readability
# Show the plot
fig.show()
chlorides levels of 0.05 produces the best quality of wine
wine_df["free sulfur dioxide"].describe()
count 1599.000000 mean 15.874922 std 10.460157 min 1.000000 25% 7.000000 50% 14.000000 75% 21.000000 max 72.000000 Name: free sulfur dioxide, dtype: float64
The Average level free sulfur dioxide is 15.87
import plotly.express as px
# Assuming wine_df contains data with columns 'free sulfur dioxide' and 'quality'
# Plot relationship between free sulfur dioxidey and wine quality
fig = px.bar(wine_df, x='free sulfur dioxide', y='quality', height=320, labels={'free sulfur dioxide"':'Wine Quality', 'free sulfur dioxide"':'free sulfur dioxide"'}, title='Relationship between free sulfur dioxide" and Wine Quality')
fig.update_layout(xaxis_tickangle=-45) # Rotate x-axis labels for better readability
# Show the plot
fig.show()
free sulfur dioxide levels of 5 produces the best quality of wine
wine_df["total sulfur dioxide"].describe()
count 1599.000000 mean 46.467792 std 32.895324 min 6.000000 25% 22.000000 50% 38.000000 75% 62.000000 max 289.000000 Name: total sulfur dioxide, dtype: float64
The average levels of total sulfur dioxide 46.46
import plotly.express as px
# Assuming wine_df contains data with columns 'total sulfur dioxide' and 'quality'
# Plot relationship between total sulfur dioxide and wine quality
fig = px.bar(wine_df, x='total sulfur dioxide', y='quality', height=320, labels={'total sulfur dioxide"':'Wine Quality', 'total sulfur dioxide"':'total sulfur dioxide"'}, title='Relationship between total sulfur dioxide" and Wine Quality')
fig.update_layout(xaxis_tickangle=-45) # Rotate x-axis labels for better readability
# Show the plot
fig.show()
free sulfur dioxide levels of 25 produces the best quality of wine
wine_df["density"].describe()
count 1599.000000 mean 0.996747 std 0.001887 min 0.990070 25% 0.995600 50% 0.996750 75% 0.997835 max 1.003690 Name: density, dtype: float64
The average denity os 0.996
import plotly.express as px
# Assuming wine_df contains data with columns 'density' and 'quality'
# Plot relationship between tdensity and wine quality
fig = px.bar(wine_df, x='density', y='quality', height=320, labels={'tdensity"':'Wine Quality', 'density"':'density'}, title='Relationship between density" and Wine Quality')
fig.update_layout(xaxis_tickangle=-45) # Rotate x-axis labels for better readability
# Show the plot
fig.show()
Density between 0.996 to 0.998 produces the best quality of wine
wine_df["pH"].describe()
count 1599.000000 mean 3.311113 std 0.154386 min 2.740000 25% 3.210000 50% 3.310000 75% 3.400000 max 4.010000 Name: pH, dtype: float64
The average PH levels is 3.311
import plotly.express as px
# Assuming wine_df contains data with columns 'pH' and 'quality'
# Plot relationship between pH and wine quality
fig = px.bar(wine_df, x='pH', y='quality', height=320, labels={'pH"':'Wine Quality', 'pH"':'density'}, title='Relationship between pH" and Wine Quality')
fig.update_layout(xaxis_tickangle=-45) # Rotate x-axis labels for better readability
# Show the plot
fig.show()
The PH levels of between 3.3 and 3.4 produces the best quality of wine
wine_df["sulphates"].describe()
count 1599.000000 mean 0.658149 std 0.169507 min 0.330000 25% 0.550000 50% 0.620000 75% 0.730000 max 2.000000 Name: sulphates, dtype: float64
The average levels of sulphates is 0.658
import plotly.express as px
# Assuming wine_df contains data with columns 'sulphates' and 'quality'
# Plot relationship between pH and wine quality
fig = px.bar(wine_df, x='sulphates', y='quality', height=320, labels={'sulphates"':'Wine Quality', 'sulphates"':'density'}, title='Relationship between sulphates" and Wine Quality')
fig.update_layout(xaxis_tickangle=-45) # Rotate x-axis labels for better readability
# Show the plot
fig.show()
Sulphates levels of 0.6 contributes to the best quality of wine
wine_df["alcohol"].describe()
count 1599.000000 mean 10.422983 std 1.065668 min 8.400000 25% 9.500000 50% 10.200000 75% 11.100000 max 14.900000 Name: alcohol, dtype: float64
The average alcohol levels is 10.42
import plotly.express as px
# Assuming wine_df contains data with columns 'alcohol' and 'quality'
# Plot relationship between pH and wine quality
fig = px.bar(wine_df, x='alcohol', y='quality', height=320, labels={'alcohol"':'Wine Quality', 'alcohol"':'density'}, title='Relationship between alcohol" and Wine Quality')
fig.update_layout(xaxis_tickangle=-45) # Rotate x-axis labels for better readability
# Show the plot
fig.show()
Alcohol levels at 9.5 contributes to the best quality of Wine
1.Fixed Acidity levels at 7 produces the best quality of Win
2.Volatile acidity levels of 0.5 and 0.6 produces the best wine quality
3.Redidula Levels of 2 contributes to the best quality of wine quality
4.free sulfur dioxide levels of 25 produces the best quality of wine
5.Density between 0.996 to 0.998 produces the best quality of wine
6.The PH levels of between 3.3 and 3.4 produces the best quality of wine
7.Sulphates levels of 0.6 contributes to the best quality of wine
8.Alcohol levels at 9.5 contributes to the best quality of Wine